# -*- coding = utf-8 -*-
# @Time : 2021/8/5 10:48
# @Author : 龙王赘婿_彪少
# @File : readwrite.py
# @Software: PyCharm

# 爬取图片保存为jpg

from lxml import html
from bs4 import BeautifulSoup  # 网页解析
import re
import urllib

# 仅有w，是可以创建文件的
file = open("./pptok.html", mode="rb")
soup = BeautifulSoup(file, "lxml")
# print(soup.a)
# print(soup.a.name)
# print(soup.a.attrs)
# print(soup.a.get('href'))

pat = re.compile(".jpg")

#找到所有有img的标签
path = './pic'
i = 0
for img in soup.find_all('img'):
    if(pat.search(img.get('src'))):
        print(img.get('src'))
        urllib.request.urlretrieve(img.get('src'), path +str(i)+".jpg")
        i = i + 1

file.close
